package SparkSmallPaper
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.log4j.{Level, Logger}
//x(0)是买家id，x(1)是商品id,x(2)是商品类别，x(3)是卖家id,x(4)是品牌id，x(5)是交易月，x(6)是交易日，x(7)是买家行为，x(8)是买家年龄段,x(9)是买家性别，x(10)是收获地址，
//该代码是实现查看男女买家交易对比 
object demo02 {
    def main(args: Array[String]): Unit = {
        val conf = new SparkConf().setMaster("local[2]").setAppName(demo01.getClass.getSimpleName)
        Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
        val sc = new SparkContext(conf)
        transformationOps5(sc)
        sc.stop()
    }
  def transformationOps5(sc:SparkContext): Unit = {
        val lines = sc.textFile("file:///C:/Users/asus/Desktop/hadoop_experiment/data/user_log.csv")
        //val listRDD = sc.parallelize(lines)
        val wordsRDD = lines.map(line => line.split(",")).zipWithIndex().filter(_._2>=1).keys
        //wordsRDD.foreach(x=>println(x(0)+":"+x(7)))
        val userAction = wordsRDD.map(x=>(x(9),1))
        
        val userId_gender= userAction.groupByKey()
        
        val outRDD=userId_gender.map(w=>(w._1,w._2.size))
        outRDD.foreach(x=>println("性别:"+x._1,"人数:"+x._2))
        println("总人数:"+userAction.count())
    }
}